Personal Computer World 2009 February

home *** CD-ROM | disk | FTP | other *** search

/ Personal Computer World 2009 February / PCWFEB09.iso / Software / Linux / Kubuntu 8.10 / kubuntu-8.10-desktop-i386.iso / casper / filesystem.squashfs / usr / lib / python2.5 / urlparse.pyc (.txt) < prev next >

Wrap

Python Compiled Bytecode | 2008-10-29 | 12KB | 474 lines

# Source Generated with Decompyle++ # File: in.pyc (Python 2.5) '''Parse (absolute and relative) URLs. See RFC 1808: "Relative Uniform Resource Locators", by R. Fielding, UC Irvine, June 1995. ''' __all__ = [ 'urlparse', 'urlunparse', 'urljoin', 'urldefrag', 'urlsplit', 'urlunsplit'] uses_relative = [ 'ftp', 'http', 'gopher', 'nntp', 'imap', 'wais', 'file', 'https', 'shttp', 'mms', 'prospero', 'rtsp', 'rtspu', '', 'sftp'] uses_netloc = [ 'ftp', 'http', 'gopher', 'nntp', 'telnet', 'imap', 'wais', 'file', 'mms', 'https', 'shttp', 'snews', 'prospero', 'rtsp', 'rtspu', 'rsync', '', 'svn', 'svn+ssh', 'sftp'] non_hierarchical = [ 'gopher', 'hdl', 'mailto', 'news', 'telnet', 'wais', 'imap', 'snews', 'sip', 'sips'] uses_params = [ 'ftp', 'hdl', 'prospero', 'http', 'imap', 'https', 'shttp', 'rtsp', 'rtspu', 'sip', 'sips', 'mms', '', 'sftp'] uses_query = [ 'http', 'wais', 'imap', 'https', 'shttp', 'mms', 'gopher', 'rtsp', 'rtspu', 'sip', 'sips', ''] uses_fragment = [ 'ftp', 'hdl', 'http', 'gopher', 'news', 'nntp', 'wais', 'https', 'shttp', 'snews', 'file', 'prospero', ''] scheme_chars = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789+-.' MAX_CACHE_SIZE = 20 _parse_cache = { } def clear_cache(): '''Clear the parse cache.''' global _parse_cache _parse_cache = { } class BaseResult(tuple): '''Base class for the parsed result objects. This provides the attributes shared by the two derived result objects as read-only properties. The derived classes are responsible for checking the right number of arguments were supplied to the constructor. ''' __slots__ = () def scheme(self): return self[0] scheme = property(scheme) def netloc(self): return self[1] netloc = property(netloc) def path(self): return self[2] path = property(path) def query(self): return self[-2] query = property(query) def fragment(self): return self[-1] fragment = property(fragment) def username(self): netloc = self.netloc if '@' in netloc: userinfo = netloc.split('@', 1)[0] if ':' in userinfo: userinfo = userinfo.split(':', 1)[0] return userinfo username = property(username) def password(self): netloc = self.netloc if '@' in netloc: userinfo = netloc.split('@', 1)[0] if ':' in userinfo: return userinfo.split(':', 1)[1] password = property(password) def hostname(self): netloc = self.netloc if '@' in netloc: netloc = netloc.split('@', 1)[1] if ':' in netloc: netloc = netloc.split(':', 1)[0] if not netloc.lower(): pass hostname = property(hostname) def port(self): netloc = self.netloc if '@' in netloc: netloc = netloc.split('@', 1)[1] if ':' in netloc: port = netloc.split(':', 1)[1] return int(port, 10) port = property(port) class SplitResult(BaseResult): __slots__ = () def __new__(cls, scheme, netloc, path, query, fragment): return BaseResult.__new__(cls, (scheme, netloc, path, query, fragment)) def geturl(self): return urlunsplit(self) class ParseResult(BaseResult): __slots__ = () def __new__(cls, scheme, netloc, path, params, query, fragment): return BaseResult.__new__(cls, (scheme, netloc, path, params, query, fragment)) def params(self): return self[3] params = property(params) def geturl(self): return urlunparse(self) def urlparse(url, scheme = '', allow_fragments = True): """Parse a URL into 6 components: <scheme>://<netloc>/<path>;<params>?<query>#<fragment> Return a 6-tuple: (scheme, netloc, path, params, query, fragment). Note that we don't break the components up in smaller bits (e.g. netloc is a single string) and we don't expand % escapes.""" tuple = urlsplit(url, scheme, allow_fragments) (scheme, netloc, url, query, fragment) = tuple if scheme in uses_params and ';' in url: (url, params) = _splitparams(url) else: params = '' return ParseResult(scheme, netloc, url, params, query, fragment) def _splitparams(url): if '/' in url: i = url.find(';', url.rfind('/')) if i < 0: return (url, '') else: i = url.find(';') return (url[:i], url[i + 1:]) def _splitnetloc(url, start = 0): delim = len(url) for c in '/?#': wdelim = url.find(c, start) if wdelim >= 0: delim = min(delim, wdelim) continue return (url[start:delim], url[delim:]) def urlsplit(url, scheme = '', allow_fragments = True): """Parse a URL into 5 components: <scheme>://<netloc>/<path>?<query>#<fragment> Return a 5-tuple: (scheme, netloc, path, query, fragment). Note that we don't break the components up in smaller bits (e.g. netloc is a single string) and we don't expand % escapes.""" allow_fragments = bool(allow_fragments) key = (url, scheme, allow_fragments, type(url), type(scheme)) cached = _parse_cache.get(key, None) if cached: return cached if len(_parse_cache) >= MAX_CACHE_SIZE: clear_cache() netloc = query = fragment = '' i = url.find(':') if i > 0: if url[:i] == 'http': scheme = url[:i].lower() url = url[i + 1:] if url[:2] == '//': (netloc, url) = _splitnetloc(url, 2) if allow_fragments and '#' in url: (url, fragment) = url.split('#', 1) if '?' in url: (url, query) = url.split('?', 1) v = SplitResult(scheme, netloc, url, query, fragment) _parse_cache[key] = v return v for c in url[:i]: if c not in scheme_chars: break continue else: scheme = url[:i].lower() url = url[i + 1:] if scheme in uses_netloc and url[:2] == '//': (netloc, url) = _splitnetloc(url, 2) if allow_fragments and scheme in uses_fragment and '#' in url: (url, fragment) = url.split('#', 1) if scheme in uses_query and '?' in url: (url, query) = url.split('?', 1) v = SplitResult(scheme, netloc, url, query, fragment) _parse_cache[key] = v return v def urlunparse(.0): '''Put a parsed URL back together again. This may result in a slightly different, but equivalent URL, if the URL that was parsed originally had redundant delimiters, e.g. a ? with an empty query (the draft states that these are equivalent).''' (scheme, netloc, url, params, query, fragment) = .0 if params: url = '%s;%s' % (url, params) return urlunsplit((scheme, netloc, url, query, fragment)) def urlunsplit(.0): (scheme, netloc, url, query, fragment) = .0 if (netloc or scheme) and scheme in uses_netloc and url[:2] != '//': if url and url[:1] != '/': url = '/' + url if not netloc: pass url = '//' + '' + url if scheme: url = scheme + ':' + url if query: url = url + '?' + query if fragment: url = url + '#' + fragment return url def urljoin(base, url, allow_fragments = True): '''Join a base URL and a possibly relative URL to form an absolute interpretation of the latter.''' if not base: return url if not url: return base (bscheme, bnetloc, bpath, bparams, bquery, bfragment) = urlparse(base, '', allow_fragments) (scheme, netloc, path, params, query, fragment) = urlparse(url, bscheme, allow_fragments) if scheme != bscheme or scheme not in uses_relative: return url if scheme in uses_netloc: if netloc: return urlunparse((scheme, netloc, path, params, query, fragment)) netloc = bnetloc if path[:1] == '/': return urlunparse((scheme, netloc, path, params, query, fragment)) if not path and params or query: return urlunparse((scheme, netloc, bpath, bparams, bquery, fragment)) segments = bpath.split('/')[:-1] + path.split('/') if segments[-1] == '.': segments[-1] = '' while '.' in segments: segments.remove('.') while None: i = 1 n = len(segments) - 1 while i < n: if segments[i] == '..' and segments[i - 1] not in ('', '..'): del segments[i - 1:i + 1] break i = i + 1 break continue if segments == [ '', '..']: segments[-1] = '' elif len(segments) >= 2 and segments[-1] == '..': segments[-2:] = [ ''] return urlunparse((scheme, netloc, '/'.join(segments), params, query, fragment)) def urldefrag(url): '''Removes any existing fragment from URL. Returns a tuple of the defragmented URL and the fragment. If the URL contained no fragments, the second element is the empty string. ''' if '#' in url: (s, n, p, a, q, frag) = urlparse(url) defrag = urlunparse((s, n, p, a, q, '')) return (defrag, frag) else: return (url, '') test_input = '\n http://a/b/c/d\n\n g:h = <URL:g:h>\n http:g = <URL:http://a/b/c/g>\n http: = <URL:http://a/b/c/d>\n g = <URL:http://a/b/c/g>\n ./g = <URL:http://a/b/c/g>\n g/ = <URL:http://a/b/c/g/>\n /g = <URL:http://a/g>\n //g = <URL:http://g>\n ?y = <URL:http://a/b/c/d?y>\n g?y = <URL:http://a/b/c/g?y>\n g?y/./x = <URL:http://a/b/c/g?y/./x>\n . = <URL:http://a/b/c/>\n ./ = <URL:http://a/b/c/>\n .. = <URL:http://a/b/>\n ../ = <URL:http://a/b/>\n ../g = <URL:http://a/b/g>\n ../.. = <URL:http://a/>\n ../../g = <URL:http://a/g>\n ../../../g = <URL:http://a/../g>\n ./../g = <URL:http://a/b/g>\n ./g/. = <URL:http://a/b/c/g/>\n /./g = <URL:http://a/./g>\n g/./h = <URL:http://a/b/c/g/h>\n g/../h = <URL:http://a/b/c/h>\n http:g = <URL:http://a/b/c/g>\n http: = <URL:http://a/b/c/d>\n http:?y = <URL:http://a/b/c/d?y>\n http:g?y = <URL:http://a/b/c/g?y>\n http:g?y/./x = <URL:http://a/b/c/g?y/./x>\n' def test(): import sys as sys base = '' if sys.argv[1:]: fn = sys.argv[1] if fn == '-': fp = sys.stdin else: fp = open(fn) else: try: StringIO = StringIO import cStringIO except ImportError: StringIO = StringIO import StringIO fp = StringIO(test_input) while None: line = fp.readline() if not line: break words = line.split() if not words: continue url = words[0] parts = urlparse(url) print '%-10s : %s' % (url, parts) abs = urljoin(base, url) if not base: base = abs wrapped = '<URL:%s>' % abs print '%-10s = %s' % (url, wrapped) if len(words) == 3 and words[1] == '=': if wrapped != words[2]: print 'EXPECTED', words[2], '!!!!!!!!!!' continue return None if __name__ == '__main__': test()